import time
import os
import requests
from lxml import etree
import hashlib

# 导入Excel工具库
import xlwt


class CSP():
    def __init__(self, uname='cx018', password=b'150201') -> None:
        # 登录态
        self.requests = requests.session()
        self.url = "http://101.33.254.221/"
        self.uname = uname
        self.password = hashlib.md5()
        self.password.update(password)
        print(self.password.hexdigest())
        self.headers = {
            'Host': '101.33.254.221',
            'Origin': 'http://101.33.254.221',
            'Referer': 'http://101.33.254.221/loginpage.php',
            'User-Agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36 Edg/96.0.1054.29',
        }

    def login(self):
        data = {
            "user_id": self.uname,
            "password": self.password.hexdigest(),
            'submit': '',
            'csrf': 'IuTTIGXuBNAqXX6Fmg6otdxB0wGVeKsG'
        }
        url = self.url+'login.php'
        self.requests.post(
            url=url, data=data, headers=self.headers)

    def problemset(self):
        wb = xlwt.Workbook()
        # 创建一个工作表，并命名courseId
        ws = wb.add_sheet('list')
        qlist = []
        for x in range(1, 15):
            url = self.url+'problemset.php?page='+str(x)
            # print(url)
            problemset = self.requests.get(url)
            html = etree.HTML(problemset.text)
            tb = html.xpath('//*[@id="problemset"]/tbody/tr')
            for tr in tb:
                qtr = []
                qtr.append(tr[1].xpath('div/text()'))
                qtr.append(tr[2].xpath('div/a/text()'))
                qtr.append(tr[2].xpath('div/a/@href'))
                qtr.append(tr[3].xpath('div/a/text()'))
                qlist.append(qtr)
            time.sleep(1)
        # 遍历数据文件中的所有行
        row = 0
        for dd in qlist:
            # 初始化列计数器
            col = 0
            # 遍历每一行数据的所有列
            for x in dd:
                ws.write(row, col, x)
                # 每添加一列数据，列计数器就+1
                col = col + 1
            # 每添加一行数据，行计数器就+1
            row = row + 1
        # 保存Excel文件，并命名为：example.xls
        wb.save('qlist' + ".xls")
        for x in qlist:
            print(x)
            html = self.requests.get(self.url+str(x[2][0])).text
            html = etree.HTML(html).xpath(
                '//div[@class="panel panel-default"]')[0]
            img = html.xpath('//img/@src')
            html = str(etree.tostring(html).decode())
            i = 0
            img_list = []
            try:
                if(img):
                    for im in img:
                        print(im)
                        img_name = str(x[0][0])+str(i)+'.'+im.split('.')[-1]
                        print(img_name)

                        img_list.append([im, img_name])

                        if 'http' not in im:
                            img = self.requests.get(self.url+im)
                        else:
                            img = self.requests.get(im)

                        os.makedirs(os.path.dirname(
                            'qlist/'+img_name), exist_ok=True)
                        with open('qlist/'+img_name, mode='wb') as fp:
                            fp.write(img.content)
            except:
                pass
            for im in img_list:
                html = html.replace(im[0], im[1])
            os.makedirs(os.path.dirname(
                        'qlist/'+str(x[0][0])+'.html'), exist_ok=True)
            with open('qlist/'+str(x[0][0])+'.html', mode='w', encoding='utf-8') as fp:
                fp.write(html)
            time.sleep(1)


app = CSP()
app.login()
app.problemset()
